- Notifications
You must be signed in to change notification settings - Fork 849
/
Copy pathOML4Py Feature Engineering Aggregation.dsnb
executable file
·1 lines (1 loc) · 6.33 KB
/
OML4Py Feature Engineering Aggregation.dsnb
1
[{"layout":null,"template":null,"templateConfig":null,"name":"OML4Py Feature Engineering Aggregation","description":null,"readOnly":false,"type":"low","paragraphs":[{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":null,"title":null,"message":["%md"," "],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","# OML4Py Feature Engineering: Aggregation","In this notebook, we demonstrate how to perform aggregation for min, max, mean, and count using OML4Py.","","We use the SH schema SALES table, which contains transaction records for each customer and products purchased. We create features by aggregating the amount sold for each customer and product pair.","","Copyright (c) 2024 Oracle Corporation ","###### <a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a>","---"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"For more information ...","message":["%md","","* <a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADB Documentation<\/a>","* <a href=\"https://github.com/oracle-samples/oracle-db-examples/tree/main/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a>","* <a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a>","* <a href=\"https://docs.oracle.com/en/database/oracle/machine-learning/oml4py/2/mlpug/cross-tabulate-data.html\" target=\"_blank\">OML4Py Cross-Tabulation<\/a>","* <a href=\"https://docs.oracle.com/en/database/oracle/machine-learning/oml4py/2/mlpug/combine-data.html\" target=\"_blank\">OML4Py Combining Data<\/a>"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Import Python libraries and set display options","message":["%python","","import warnings","warnings.filterwarnings('ignore')","","import pandas as pd","import oml","from oml import automl"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Get a proxy object to SALES data and view a few records","message":["%python","","SALES_DF = oml.sync(query = 'select cust_id, prod_id, amount_sold from sh.sales')","","z.show(SALES_DF.head())"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"View multiple records for each customer and product pair with count","message":["%python","","z.show(SALES_DF.crosstab(['CUST_ID', 'PROD_ID']).head(10))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Compute cross tabulation using min, max, mean, and count for each customer and product","message":["%python","","MAX_DF = SALES_DF.crosstab(['CUST_ID', 'PROD_ID'], values = 'AMOUNT_SOLD', aggfunc = oml.DataFrame.max)","MIN_DF = SALES_DF.crosstab(['CUST_ID', 'PROD_ID'], values = 'AMOUNT_SOLD', aggfunc = oml.DataFrame.min)","MEAN_DF = SALES_DF.crosstab(['CUST_ID', 'PROD_ID'], values = 'AMOUNT_SOLD', aggfunc = oml.DataFrame.mean)","COUNT_DF = SALES_DF.crosstab(['CUST_ID', 'PROD_ID'], values = 'AMOUNT_SOLD', aggfunc = oml.DataFrame.count)"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Join all results","message":["%python","","AGG_DF = MAX_DF.merge(MIN_DF, on = ['CUST_ID', 'PROD_ID'], how = 'inner', suffixes =['', '']) ","AGG_DF = AGG_DF.merge(MEAN_DF, on = ['CUST_ID', 'PROD_ID'], how = 'inner', suffixes =['', ''])","AGG_DF = AGG_DF.merge(COUNT_DF, on = ['CUST_ID', 'PROD_ID'], how = 'inner', suffixes =['', ''])"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"View aggregated data","message":["%python","","z.show(AGG_DF.round(2).head(10))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","# End of Script"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"}],"version":"6","snapshot":false,"tags":null}]